package com.qingyun.service.compare;

import org.apache.pdfbox.pdmodel.PDPage;
import org.apache.pdfbox.text.PDFTextStripper;
import org.apache.pdfbox.text.TextPosition;

import java.io.IOException;
import java.util.ArrayList;
import java.util.List;

/**
 * 自定义文本提取器（收集位置信息）
 */
public class PositionTextStripper extends PDFTextStripper {
    private final List<TextElement> textElements = new ArrayList<>();
    private int currentPage = 1;

    public PositionTextStripper() throws IOException {
        super();
        super.setSortByPosition(true);
    }

    @Override
    protected void startPage(PDPage page) {
        currentPage = getCurrentPageNo();
    }

    @Override
    protected void writeString(String text, List<TextPosition> textPositions) {
        for (TextPosition position : textPositions) {
            textElements.add(new TextElement(position, currentPage));
        }
    }

    public List<TextElement> getTextElements() {
        return textElements;
    }
}
